*------------------------------------------------------------------------------
* Dollars, Desks, and Development: The Impact of Local Sales Tax Revenue Redistribution on Recipient Programs in North Carolina

* Authors: Alex Combs and Whitney Afonso

* Do file for merging intermediate data files and constructing analytical sample and variables
*------------------------------------------------------------------------------

clear

*------------------------------------------------------------------------------
* Merge
*------------------------------------------------------------------------------
use nc_lst

merge 1:1  cty_name year using nc_afir
drop _merge

merge m:1 year using cpi20102022, keepusing(deflator_cpi2022)
drop if _merge==2
drop _merge

merge 1:1 cty_name year using propertytaxdata, keepusing(taxrate)
drop if _merge==2
drop _merge

merge 1:1 cty_fips year using nc_census_controls
drop _merge

*------------------------------------------------------------------------------
* Generate variables
*------------------------------------------------------------------------------

* Continuous revenue received treatment variable
gen pc_ART44524 = ART44524/pop
gen r_pc_ART44524 = pc_ART44524/deflator_cpi2022
order pc_ART44524 r_pc_ART44524, after(ART44524)

* Net redistributed revenue group variable
gen pc_net_lst_redist = net_lst_redist/pop
gen r_pc_net_lst_redist = pc_net_lst_redist/deflator_cpi2022
order pc_net_lst_redist r_pc_net_lst_redist, after(net_lst_redist)

preserve 
keep if year>=2017
gen net_gain = (net_lst_redist>0)
collapse (mean) pc_net_lst_redist (firstnm) net_gain, by(cty_number)
xtile win_terc = pc_net_lst_redist if net_gain==1, nq(3)
xtile lose_terc = pc_net_lst_redist if net_gain==0, nq(3)
drop pc_net_lst_redist
tempfile win_loss_terciles
save `win_loss_terciles'
restore

merge m:1 cty_number using `win_loss_terciles'
drop _merge

gen treat_group = 0
replace treat_group = 1 if win_terc==1
replace treat_group = 2 if win_terc==2
replace treat_group = 3 if win_terc==3

* Trend Groups that correspond to Figure 1 map
preserve 
keep if year==2017
gen receive_group = 0
replace receive_group = 1 if r_pc_ART44524>0 & r_pc_ART44524<=6.5
replace receive_group = 2 if r_pc_ART44524>6.5 & r_pc_ART44524<=20
replace receive_group = 3 if r_pc_ART44524>20 & r_pc_ART44524<=30
replace receive_group = 4 if r_pc_ART44524>30 & r_pc_ART44524<=60
keep cty_number receive_group
tempfile receive_group
save `receive_group'
restore

merge m:1 cty_number using `receive_group'
drop _merge

* Treatment dummy
gen receive_treat = 0
replace receive_treat = 1 if receive_group>0
tab year receive_treat

* Post dummy
gen post = (year>=2017)

* Exclusion of net loss receivers (nlr) dummy
gen nlr = 0
replace nlr = 1 if receive_group==1

* Additional variables
gen rev_less_lstprop = rev_total - TOTAL - rev_prop_tax
gen all_oth_rev = rev_total - ART44524
gen rev_control_lst = all_oth_rev-(rev_394042-ART44524)
gen rev_control_onoprop = all_oth_rev-(rev_other_tax + /// 
 rev_sales_services + rev_intergov + rev_debt_proceeds + rev_other_misc)
gen rev_control_prop = all_oth_rev-rev_prop_tax
rename expobj_school_capital k12cap
rename expobj_school_currentopt k12op
gen exp_k12 = k12cap + k12op + expobj_school_allother
gen k12opcap = k12cap + k12op
gen exp_earmark = k12opcap + exp_commcoll + exp_econdev
gen exp_nonearmark = exp_total - exp_earmark
gen pct_orace = 100-(pct_hispanic+pct_white+pct_black)
drop rev_other_tax rev_sales_tax rev_sales_services rev_intergov /// 
 rev_debt_proceeds rev_other_misc
 
* Reorder variables
order pct_orace, after(pct_black)
order net_gain-nlr, after(r_pc_net_lst_redist)
order deflator_cpi2022-pop, after(exp_nonearmark)
order k12opcap k12op k12cap expobj_school_allother exp_commcoll exp_econdev, /// 
 after(exp_nonearmark)
order capital524use, after(pop)

* Rescale per capita and log transform
foreach var of varlist rev_394042-k12op {
	gen pc_`var' = `var'/pop
	order pc_`var', after(`var')
	gen r_pc_`var' = pc_`var'/deflator_cpi2022
	order r_pc_`var', after(pc_`var')
	gen log_r_pc_`var' = ln(r_pc_`var')
	order log_r_pc_`var', after(r_pc_`var')
} 

foreach var of varlist k12cap-exp_econdev {
	gen pc_`var' = `var'/pop
	order pc_`var', after(`var')
	gen r_pc_`var' = pc_`var'/deflator_cpi2022
	order r_pc_`var', after(pc_`var')
	gen log_r_pc_`var' = ln(r_pc_`var'+1)
	order log_r_pc_`var', after(r_pc_`var')
}

gen log_pop = ln(pop)

*------------------------------------------------------------------------------
* Analytical sample
*------------------------------------------------------------------------------

* 2 counties missing post-treatment AFIR data
* 1 county missing 2017 and 2018, leaving only 2019 after Wayfair case
* 3 counties excluded by dCDH model due to timing of missing data
drop if inlist(cty_number, 24, 40, 47, 66, 78, 82)
* 10 additional pre-treatment observations excluded from dCDH model
gen dcdh_sample = 1
replace dcdh_sample = 0 if cty_number==71 & year==2014
replace dcdh_sample = 0 if cty_number==57 & year<=2014
replace dcdh_sample = 0 if cty_number==48 & year<=2014
replace dcdh_sample = 0 if cty_number==37 & year==2014
replace dcdh_sample = 0 if cty_number==33 & year==2013
replace dcdh_sample = 0 if cty_number==8 & year==2013
replace dcdh_sample = 0 if cty_number==4 & year<=2014

save analysis_data, replace

clear
